Lecture 16¶
Oct 30, 2024
We'll discuss saving SageObjects to files and NumPy
Working with files.¶
(This section was covered last class.)
pi_n = pi.n(digits=1000)
pi_n
3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420199
sqrt2_n = sqrt(2).n(digits=1000)
sqrt2_n
1.414213562373095048801688724209698078569671875376948073176679737990732478462107038850387534327641572735013846230912297024924836055850737212644121497099935831413222665927505592755799950501152782060571470109559971605970274534596862014728517418640889198609552329230484308714321450839762603627995251407989687253396546331808829640620615258352395054745750287759961729835575220337531857011354374603408498847160386899970699004815030544027790316454247823068492936918621580578463111596668713013015618568987237235288509264861249497715421833420428568606014682472077143585487415565706967765372022648544701585880162075847492265722600208558446652145839889394437092659180031138824646815708263010059485870400318648034219489727829064104507263688131373985525611732204024509122770022694112757362728049573810896750401836986836845072579936472906076299694138047565482372899718032680247442062926912485905218100445984215059112024944134172853147810580360337107730918286931471017111168391658172688941975871658215212822951848847
# Open the file 'pi.txt' for writing 'w':
file = open('pi2.txt','w')
# Write a first line:
file.write(str(pi_n) + '\n') # The \n is the end of line character
# Write a second line:
file.write(str(sqrt2_n) + '\n')
file.close()
F = RealField(ceil(1000*log(10)/log(2)))
F
Real Field with 3322 bits of precision
# Open the file 'pi.txt' for reading 'r':
file = open('pi.txt','r')
pi_n2 = F(file.readline()[:-1])
sqrt2_n2 = F(file.readline()[:-1])
file.close()
pi_n - pi_n2
0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
sqrt2_n - sqrt2_n2
1.90276169491197089171305047781199743768583199909521552203339710520257386806628280317717710659022732987398150851938733396829652370994524211450003026905963786745979914806821721321706933041532994582530851421118834119884738392190124101967878750176539429468389974064684253444555973939392494038301285007904666990883257895091382485294015087362389513510749912640479999632404433498368488280453297259389515624473947727265298543150873710155450983501994979564647122046083564456640975863296994962084519167119734895441308420997382490968896184125029915849625109713693625840708102917735105610381325503311542178820893768963344542025367929140360685382670995405175528187976655588577099869409721946329252914034100407793543986494421234567102066136099298479977330187110753124959952473727481554761387204375547351052469184757564168119788194274802348955254419292944038849924787447358790507985808493898867685550773485668022138039440752438909244880840102894402388729447154952309853786750313423924712681316032387654363555131068e-1000
To read all the lines you can do:
# Open the file 'pi.txt' for reading 'r':
file = open('pi.txt','r')
for line in file.readlines():
print(repr(line))
file.close()
'3.141592653589793238462643383279502884197169399375105820974944592307816406286208998628034825342117067982148086513282306647093844609550582231725359408128481117450284102701938521105559644622948954930381964428810975665933446128475648233786783165271201909145648566923460348610454326648213393607260249141273724587006606315588174881520920962829254091715364367892590360011330530548820466521384146951941511609433057270365759591953092186117381932611793105118548074462379962749567351885752724891227938183011949129833673362440656643086021394946395224737190702179860943702770539217176293176752384674818467669405132000568127145263560827785771342757789609173637178721468440901224953430146549585371050792279689258923542019956112129021960864034418159813629774771309960518707211349999998372978049951059731732816096318595024459455346908302642522308253344685035261931188171010003137838752886587533208381420617177669147303598253490428755468731159562863882353787593751957781857780532171226806613001927876611195909216420199\n' '1.414213562373095048801688724209698078569671875376948073176679737990732478462107038850387534327641572735013846230912297024924836055850737212644121497099935831413222665927505592755799950501152782060571470109559971605970274534596862014728517418640889198609552329230484308714321450839762603627995251407989687253396546331808829640620615258352395054745750287759961729835575220337531857011354374603408498847160386899970699004815030544027790316454247823068492936918621580578463111596668713013015618568987237235288509264861249497715421833420428568606014682472077143585487415565706967765372022648544701585880162075847492265722600208558446652145839889394437092659180031138824646815708263010059485870400318648034219489727829064104507263688131373985525611732204024509122770022694112757362728049573810896750401836986836845072579936472906076299694138047565482372899718032680247442062926912485905218100445984215059112024944134172853147810580360337107730918286931471017111168391658172688941975871658215212822951848847\n'
Saving and loading Sage objects¶
We can save and load Sage objects from a file. (This will not work for Python objects like lists and dictonaries.)
M = matrix([[pi, sqrt(2)],[sin(1/7), cos(sqrt(3))]])
M
[ pi sqrt(2)] [ sin(1/7) cos(sqrt(3))]
M.save('m_file.sobj')
MM = load('m_file.sobj')
MM
[ pi sqrt(2)] [ sin(1/7) cos(sqrt(3))]
d = {'M': M}
d
{'M': [ pi sqrt(2)] [ sin(1/7) cos(sqrt(3))]}
d.save()
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[14], line 1 ----> 1 d.save() AttributeError: 'dict' object has no attribute 'save'
Pickling¶
You should use Pickling if you want to save more than just Sage objects that support save
. To see more check the reference on object persistence.
import pickle
d = {'M': M}
We can use pickle to convert a Python object into a byte string:
s = pickle.dumps(d)
s
b'\x80\x04\x95\xc7\x02\x00\x00\x00\x00\x00\x00}\x94\x8c\x01M\x94\x8c\x13sage.matrix.matrix0\x94\x8c\x08unpickle\x94\x93\x94(\x8c!sage.matrix.matrix_symbolic_dense\x94\x8c\x15Matrix_symbolic_dense\x94\x93\x94\x8c$sage.structure.unique_representation\x94\x8c\x08unreduce\x94\x93\x94\x8c\x18sage.matrix.matrix_space\x94\x8c\x0bMatrixSpace\x94\x93\x94(\x8c\x12sage.symbolic.ring\x94\x8c\x10the_SymbolicRing\x94\x93\x94)R\x94K\x02K\x02\x89h\x07t\x94}\x94\x87\x94R\x94\x89N]\x94(\x8c\x18sage.symbolic.expression\x94\x8c\nExpression\x94\x93\x94)\x81\x94K\x00]\x94\x8c.GARC\x03\x05constant\x00class\x00pi\x00name\x00sage_ex\x00\x01\x04\x00\x01\x02\n\x00\x1a\x02\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8cTGARC\x03\npower\x00class\x00T\x002\x00S\x00numeric\x00basis\x001/2\x00exponent\x00sage_ex\x00\x01\t\x02\x03\x03\x11\x01"\x03\n\x05\x03\x11\x04"\x07\n\x05\x03\n\x003\x00C\x01\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8cUGARC\x03\x0bfunction\x00class\x00T\x001/7\x00S\x00numeric\x00seq\x00python\x00sin\x00name\x00sage_ex\x00\x01\n\x01\x02\x03\x11\x04"\x03\n\x05\x04\n\x003\x009\x00J\x08\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8czGARC\x03\x0ffunction\x00class\x00power\x00T\x003\x00S\x00numeric\x00basis\x001/2\x00exponent\x00seq\x00python\x00cos\x00name\x00sage_ex\x00\x01\x0e\x03\x04\x03\x19\x01*\x04\n\x06\x03\x19\x04*\x08\n\x06\x03\n\x02;\x00K\x01\x04\n\x00S\x02Y\x00j\x0c\x94\x87\x94beK\x00t\x94R\x94s.'
# Open the file 'pi.txt' for writing bytes 'wb':
file = open('d_pickle.pkl','wb')
file.write(s) # The \n is the end of line character
file.close()
# Recovery: Open for reading bytes
file = open('d_pickle.pkl','rb')
ss = file.read()
file.close()
ss
b'\x80\x04\x95\xc7\x02\x00\x00\x00\x00\x00\x00}\x94\x8c\x01M\x94\x8c\x13sage.matrix.matrix0\x94\x8c\x08unpickle\x94\x93\x94(\x8c!sage.matrix.matrix_symbolic_dense\x94\x8c\x15Matrix_symbolic_dense\x94\x93\x94\x8c$sage.structure.unique_representation\x94\x8c\x08unreduce\x94\x93\x94\x8c\x18sage.matrix.matrix_space\x94\x8c\x0bMatrixSpace\x94\x93\x94(\x8c\x12sage.symbolic.ring\x94\x8c\x10the_SymbolicRing\x94\x93\x94)R\x94K\x02K\x02\x89h\x07t\x94}\x94\x87\x94R\x94\x89N]\x94(\x8c\x18sage.symbolic.expression\x94\x8c\nExpression\x94\x93\x94)\x81\x94K\x00]\x94\x8c.GARC\x03\x05constant\x00class\x00pi\x00name\x00sage_ex\x00\x01\x04\x00\x01\x02\n\x00\x1a\x02\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8cTGARC\x03\npower\x00class\x00T\x002\x00S\x00numeric\x00basis\x001/2\x00exponent\x00sage_ex\x00\x01\t\x02\x03\x03\x11\x01"\x03\n\x05\x03\x11\x04"\x07\n\x05\x03\n\x003\x00C\x01\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8cUGARC\x03\x0bfunction\x00class\x00T\x001/7\x00S\x00numeric\x00seq\x00python\x00sin\x00name\x00sage_ex\x00\x01\n\x01\x02\x03\x11\x04"\x03\n\x05\x04\n\x003\x009\x00J\x08\x94\x87\x94bh\x19)\x81\x94K\x00]\x94\x8czGARC\x03\x0ffunction\x00class\x00power\x00T\x003\x00S\x00numeric\x00basis\x001/2\x00exponent\x00seq\x00python\x00cos\x00name\x00sage_ex\x00\x01\x0e\x03\x04\x03\x19\x01*\x04\n\x06\x03\x19\x04*\x08\n\x06\x03\n\x02;\x00K\x01\x04\n\x00S\x02Y\x00j\x0c\x94\x87\x94beK\x00t\x94R\x94s.'
ss == s
True
# reconstruting the object:
dd = pickle.loads(ss)
dd
{'M': [ pi sqrt(2)] [ sin(1/7) cos(sqrt(3))]}
d == dd
True
Example from research (Discuss on board)¶
NumPy¶
NumPy is a library used for numerical computations, especially in scientific and engineering applications. It is very fast, for computations involving standard floating point arithmetic.
Sage uses NumPy for some things, particularly for things when approximation by floats is appropriate. The book Computational Mathematics with SageMath briefly describes its use in numerical linear algebra and differential equations.
NumPy very useful in general when you want to manipulate a lot of data in an efficient organized way while using Python syntax. This makes it popular for things like experimental sciences, data processing, and machine learning. It is also useful for visualization, where exact arithmetic as we've been doing is typically not be necessary.
Remark: It seems the most popular pronounciation is num-pie, but I seem to have picked up num-pee somewhere. It seems there is no consensus (and lots of people do not care).
References:
The following books do not discuss Sage but do discuss Numpy. They are all freely available for download from our library.
- [Programming for Computations - Python: A Gentle Introduction to Numerical
Simulations with Python 3.6](https://cuny-cc.primo.exlibrisgroup.com/permalink/01CUNY_CC/qlf695/cdi_proquest_ebookcentral_EBC6113652) by Svein Linge and Hans Petter Langtangen, 2nd edition.
- Applied Scientific Computing With Python by Peter R. Turner, Thomas Arildsen, and Kathleen Kavanagh.
- A Primer on Scientific Programming with Python by Hans Petter Langtangen, 5th edition, 2016.
Finally there is the NumPy documentation.
Importing NumPy:
SageMath comes with Numpy, but it is not immediately available. You need to import the package.
There are two standard ways. You could just import the package:
import numpy
Then to create a NumPy array, you would do:
numpy.array([1,2,3,4])
array([1, 2, 3, 4])
To save some typing, you can import the package with:
import numpy as np
This makes the NumPy package available as np
. So, you save three characters when creating your array:
v = np.array([1,2,3,4])
Those three characters seem unimportant, but sometimes you'll have np
or numpy
all over the place... Choose one!
These are vectors:
3*v
array([ 3, 6, 9, 12])
v+v
array([2, 4, 6, 8])
Unlike with SageMath when you apply a standard function to an array, it applies it in every coordinate. For example:
cos(v)
array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])
Remark: NumPy has a built in cosine function of its own, np.cos
. Presumably SageMath recognized that v
is a Numpy array and passes off the call to np.cos
. We get the same result:
np.cos(v)
array([ 0.54030231, -0.41614684, -0.9899925 , -0.65364362])
Compare this to applying cosine to a standard Sage vector:
w = vector([1,2,3,4])
cos(w)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) File ~/Git/sage/sage/src/sage/symbolic/function.pyx:534, in sage.symbolic.function.Function.__call__() 533 try: --> 534 args = [SR.coerce(a) for a in args] 535 except TypeError as err: File ~/Git/sage/sage/src/sage/structure/parent.pyx:1198, in sage.structure.parent.Parent.coerce() 1197 -> 1198 cpdef coerce(self, x): 1199 """ File ~/Git/sage/sage/src/sage/structure/parent.pyx:1228, in sage.structure.parent.Parent.coerce() 1227 _record_exception() -> 1228 raise TypeError(_LazyString("no canonical coercion from %s to %s", (parent(x), self), {})) 1229 else: TypeError: no canonical coercion from Ambient free module of rank 4 over the principal ideal domain Integer Ring to Symbolic Ring During handling of the above exception, another exception occurred: TypeError Traceback (most recent call last) Cell In[32], line 2 1 w = vector([Integer(1),Integer(2),Integer(3),Integer(4)]) ----> 2 cos(w) File ~/Git/sage/sage/src/sage/symbolic/function.pyx:1046, in sage.symbolic.function.BuiltinFunction.__call__() 1044 res = self._evalf_try_(*args) 1045 if res is None: -> 1046 res = super().__call__( 1047 *args, coerce=coerce, hold=hold) 1048 File ~/Git/sage/sage/src/sage/symbolic/function.pyx:547, in sage.symbolic.function.Function.__call__() 545 if callable(method): 546 return method() --> 547 raise TypeError("cannot coerce arguments: %s" % (err)) 548 549 else: # coerce == False TypeError: cannot coerce arguments: no canonical coercion from Ambient free module of rank 4 over the principal ideal domain Integer Ring to Symbolic Ring
This can lead to unexpected results. For example in Sage, multiplication of vectors is the dot product. But, in NumPy we get:
print(v)
v*v
[1 2 3 4]
array([ 1, 4, 9, 16])
With arrays, most things are computed coordinate-wise.
To get the dot product, we can do:
np.dot(v, v)
30
Arrays versus matrices¶
Numpy also has a matrix class which might seem more natural to you. Here we construct a row vector:
row = np.matrix([
[1,2,3,4]
])
row
matrix([[1, 2, 3, 4]])
Here is a column vector.
col = np.matrix([
[ 5],
[ 3],
[-2],
[ 3]
])
col
matrix([[ 5], [ 3], [-2], [ 3]])
With a NumPy matrix, multiplication is matrix multiplication.
row*col
matrix([[17]])
col*row
matrix([[ 5, 10, 15, 20], [ 3, 6, 9, 12], [-2, -4, -6, -8], [ 3, 6, 9, 12]])
Conversion between arrays and matrices¶
Say we want to convert the following NumPy matrix into an array.
m = col*row
m
matrix([[ 5, 10, 15, 20], [ 3, 6, 9, 12], [-2, -4, -6, -8], [ 3, 6, 9, 12]])
We can convert it with the NumPy constructor for array:
a = np.array(m)
a
array([[ 5, 10, 15, 20], [ 3, 6, 9, 12], [-2, -4, -6, -8], [ 3, 6, 9, 12]])
a[0][0]
5
This makes a copy of the data inside.
a[0][0] = 1000
a
array([[1000, 10, 15, 20], [ 3, 6, 9, 12], [ -2, -4, -6, -8], [ 3, 6, 9, 12]])
m
matrix([[ 5, 10, 15, 20], [ 3, 6, 9, 12], [-2, -4, -6, -8], [ 3, 6, 9, 12]])
As an alternative, you can use NumPy's as_array
.
aa = np.asarray(m)
aa
array([[ 5, 10, 15, 20], [ 3, 6, 9, 12], [-2, -4, -6, -8], [ 3, 6, 9, 12]])
aa[0][0]=99999
aa
array([[99999, 10, 15, 20], [ 3, 6, 9, 12], [ -2, -4, -6, -8], [ 3, 6, 9, 12]])
m
matrix([[99999, 10, 15, 20], [ 3, 6, 9, 12], [ -2, -4, -6, -8], [ 3, 6, 9, 12]])
Remark: When dealing with a lot of data, you often don't want to make copies of it. NumPy has lots of ways to get different "views" of the same data, which allow accessing the same data differently. Changing a view like this is nearly instantaneous.
Data types¶
Just like in Sage, Numpy objects have data types. But the system is simpler. The dtype
attribute tells you what type of objects are in an array or matrix:
print(repr(v))
v.dtype
array([1, 2, 3, 4])
dtype('int64')
Above shows that the objects in the are 64-bit integers. This means that they can overflow unlike standard Sage integers. Here we raise each coordinate to the 64th power:
v^64
array([ 1, 0, 8733086111712066817, 0])
If you put in sage exact numbers into a NumPy array you'll get a dtype
of object.
rts = np.array([sqrt(k) for k in range(10)])
rts
array([0, 1, sqrt(2), sqrt(3), 2, sqrt(5), sqrt(6), sqrt(7), 2*sqrt(2), 3], dtype=object)
repr(rts)
'array([0, 1, sqrt(2), sqrt(3), 2, sqrt(5), sqrt(6), sqrt(7), 2*sqrt(2), 3],\n dtype=object)'
print(rts)
[0 1 sqrt(2) sqrt(3) 2 sqrt(5) sqrt(6) sqrt(7) 2*sqrt(2) 3]
Note that dtypes are printed unless the dtype is int
or float
or complex
. Dtypes are generally preserved under operations.
rts^2
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=object)
cos(rts)
array([1, cos(1), cos(sqrt(2)), cos(sqrt(3)), cos(2), cos(sqrt(5)), cos(sqrt(6)), cos(sqrt(7)), cos(2*sqrt(2)), cos(3)], dtype=object)
I don't think there are many reasons to use a dtype of object. (Perhaps if you want to make use of the organization of data NumPy provides.)
The primary use of NumPy in my opinion is for floating point arithmetic. If you enter standard precision floats into a NumPy array or matrix, it will be converted to a float dtype.
pi_fracs = np.array([(pi/k).n() for k in range(1, 11)])
pi_fracs
array([3.14159265, 1.57079633, 1.04719755, 0.78539816, 0.62831853, 0.52359878, 0.44879895, 0.39269908, 0.34906585, 0.31415927])
pi_fracs.dtype
dtype('float64')
For efficiency, this is the data type you want. You can see the following modification is bad for efficiency (dtype is object).
pi_fracs_bad = np.array([(pi/k).n(54) for k in range(1, 11)])
pi_fracs_bad
array([3.14159265358979, 1.57079632679490, 1.04719755119660, 0.785398163397448, 0.628318530717959, 0.523598775598299, 0.448798950512828, 0.392699081698724, 0.349065850398866, 0.314159265358979], dtype=object)
I entered 54
above because standard doubles have 53
bits of precision. (Recall our discussion of doubles.) You can see you don't get floats above, but you do if 54 is changed to 53:
pi_fracs_good = np.array([(pi/k).n(53) for k in range(1, 11)])
pi_fracs_good
array([3.14159265, 1.57079633, 1.04719755, 0.78539816, 0.62831853, 0.52359878, 0.44879895, 0.39269908, 0.34906585, 0.31415927])
Let's check the time it takes to runs something:
def apply_repeatedly(f, x, n):
for i in range(n):
x = f(x)
return x
Remark: I just learned that you can add underscores to large numbers to improve readability.
%time apply_repeatedly(cos, pi_fracs_bad, 100_000)
CPU times: user 1.72 s, sys: 0 ns, total: 1.72 s Wall time: 1.72 s
array([0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161], dtype=object)
%time apply_repeatedly(cos, pi_fracs_good, 100000)
CPU times: user 88.1 ms, sys: 2.85 ms, total: 91 ms Wall time: 90 ms
array([0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513, 0.73908513])
Just so you see that the cause isn't that extra bit, observe you can explicitly declare the 'object' dtype.
pi_fracs_bad2 = np.array([(pi/k).n(53) for k in range(1, 11)], dtype='object')
print(pi_fracs_bad2)
%time apply_repeatedly(cos, pi_fracs_bad2, 100_000)
[3.14159265358979 1.57079632679490 1.04719755119660 0.785398163397448 0.628318530717959 0.523598775598299 0.448798950512828 0.392699081698724 0.349065850398866 0.314159265358979] CPU times: user 1.63 s, sys: 0 ns, total: 1.63 s Wall time: 1.63 s
array([0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161, 0.739085133215161], dtype=object)
Using RDF
does better but still is twice as slow as NumPy's float64
pi_fracs_bad3 = np.array([RDF(pi/k) for k in range(1, 11)], dtype='object')
print(pi_fracs_bad3)
%time apply_repeatedly(cos, pi_fracs_bad3, 100_000)
[3.141592653589793 1.5707963267948966 1.0471975511965976 0.7853981633974483 0.6283185307179586 0.5235987755982988 0.4487989505128276 0.39269908169872414 0.3490658503988659 0.3141592653589793] CPU times: user 287 ms, sys: 7.33 ms, total: 295 ms Wall time: 292 ms
array([0.7390851332151605, 0.7390851332151608, 0.7390851332151608, 0.7390851332151608, 0.7390851332151605, 0.7390851332151605, 0.7390851332151605, 0.7390851332151605, 0.7390851332151605, 0.7390851332151605], dtype=object)
You can also construct complex types. For example:
w = np.array([(I^k).n() for k in range(5)])
w
array([ 1.+0.j, 0.+1.j, -1.+0.j, 0.-1.j, 1.+0.j])
w.dtype
dtype('complex128')
Standard constructions of arrays¶
Distributing points throughout an interval:¶
NumPy's arange
works similar to range
. It has the syntax arange([start,] stop[, step,])
.
np.arange(1, 10, 1/10)
array([1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8. , 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9. , 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9])
I usually prefer linspace
. Here you specify the interval and say how many points you want in it:
np.linspace(1, 10, 91)
array([ 1. , 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2. , 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3. , 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4. , 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5. , 5.1, 5.2, 5.3, 5.4, 5.5, 5.6, 5.7, 5.8, 5.9, 6. , 6.1, 6.2, 6.3, 6.4, 6.5, 6.6, 6.7, 6.8, 6.9, 7. , 7.1, 7.2, 7.3, 7.4, 7.5, 7.6, 7.7, 7.8, 7.9, 8. , 8.1, 8.2, 8.3, 8.4, 8.5, 8.6, 8.7, 8.8, 8.9, 9. , 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10. ])
Note that unlike arange
both the start and end are included. You can remove them with endpoint=False
.
2D shapes:¶
# Identity matrix
np.eye(3)
array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
v = np.arange(1,4)
print(f'v = {v}')
# Construct a diagonal matrix.
np.diag(v)
v = [1 2 3]
array([[1, 0, 0], [0, 2, 0], [0, 0, 3]])
Arbitrary shapes:¶
np.zeros((2,2,2))
array([[[0., 0.], [0., 0.]], [[0., 0.], [0., 0.]]])
np.ones((2,2,2))
array([[[1., 1.], [1., 1.]], [[1., 1.], [1., 1.]]])
# Random entries
np.random.default_rng().random((2,2,2))
array([[[0.80315076, 0.88860231], [0.94333593, 0.48070407]], [[0.44246217, 0.1620841 ], [0.45872105, 0.19925103]]])
Reshaping¶
v = np.arange(1, 10, 1)
v
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
v.shape
(9,)
Shape (n,)
represents a vector of length n
. Here are some others:
# Row vector
v.shape = (1,9)
v
array([[1, 2, 3, 4, 5, 6, 7, 8, 9]])
# Column vector
v.shape = (9,1)
v
array([[1], [2], [3], [4], [5], [6], [7], [8], [9]])
# Rectangular matrix
v.shape = (3,3)
v
array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
A more exotic one:
w = np.arange(1, 9, 1)
w
array([1, 2, 3, 4, 5, 6, 7, 8])
# 2x2x2 cube:
w.shape = (2,2,2)
w
array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
Entries would then be accessed with:
w[0][1][0]
3
Note that changing the shape with w.shape = ...
has the effect of changing how the data is accessed. Now changes to the underlying data are made. You can get a different view of the same data with the reshape method:
w
array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
w_rect = w.reshape((2,4))
w_rect
array([[1, 2, 3, 4], [5, 6, 7, 8]])
w
array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])
w_rect[0][0] = 100 # Change one entry of w_rect
w_rect
array([[100, 2, 3, 4], [ 5, 6, 7, 8]])
w
array([[[100, 2], [ 3, 4]], [[ 5, 6], [ 7, 8]]])
See, it really is the same data, but with how it is accessed reorganized.
Example: Image of the unit circle under a complex map.¶
N = 10
np.arange(N)/N*2*pi.n()
array([0. , 0.62831853, 1.25663706, 1.88495559, 2.51327412, 3.14159265, 3.76991118, 4.39822972, 5.02654825, 5.65486678])
N = 10
theta = (2*pi).n()*np.arange(N)/N
z = exp(I.n()*theta)
x = real_part(z)
y = imag_part(z)
x
array([ 1. , 0.80901699, 0.30901699, -0.30901699, -0.80901699, -1. , -0.80901699, -0.30901699, 0.30901699, 0.80901699])
point2d(zip(x,y), aspect_ratio=1, figsize=4)
N = 100
theta = (I*2*pi).n()*np.arange(N)/N
z = exp(theta)
w = (z+1)^2
x = real_part(w)
y = imag_part(w)
point2d(zip(x,y), aspect_ratio=1, figsize=4)
N = 1000
theta = (I*2*pi).n()*np.arange(N)/N
z = exp(theta)
w = (z+1)^10
x = real_part(w)
y = imag_part(w)
line2d(zip(x,y), aspect_ratio=1, figsize=4)
p = minpoly(exp(2*pi*I/7))
p(var('z'))
z^6 + z^5 + z^4 + z^3 + z^2 + z + 1
N = 10000
theta = (I*2*pi).n()*np.arange(N)/N
z = exp(theta)
w = z^6 + z^5 + z^4 + z^3 + z^2 + z + 1
x = real_part(w)
y = imag_part(w)
line2d(zip(x,y), aspect_ratio=1, figsize=4)
Example: Plotting a curve in 3 space:¶
Preliminary: Demonstration of zip
:
x = [ 1, 2, 3]
y = ['a', 'b', 'c']
zip(x,y)
<zip object at 0x7f8b081224c0>
# Convert the zip object to a list so we can see it:
list(zip(x,y))
[(1, 'a'), (2, 'b'), (3, 'c')]
You can use more than two objects of the same length:
list( zip(x,y,['hey', 'you', 'guys']) )
[(1, 'a', 'hey'), (2, 'b', 'you'), (3, 'c', 'guys')]
You can see the documentation of zip
or type zip?
.
Remark: NumPy also has functions that would be useful for this when things are NumPy arrays. But it is good to know about zip when not using NumPy. Here is a demo.
x = np.array(x)
y = np.array(y)
z = np.array(['hey', 'you', 'guys'])
np.column_stack([x,y,z])
array([['1', 'a', 'hey'], ['2', 'b', 'you'], ['3', 'c', 'guys']], dtype='<U21')
Below we plot the curve in the unit sphere where $\theta = \tan \big(\frac{\pi z}{2}\big)$ is the cylinderical coordinate.
# t is 100 points in [0, 4*pi]
z = np.linspace(-1, 1, 100_000)
theta = tan(np.pi * z / 2) # Use np.pi rather than pi to avoid the object dtype
x = sqrt(1-z^2) * cos(theta)
y = sqrt(1-z^2) * sin(theta)
line3d(zip(x,y,z))
Plotting a Julia set¶
The Julia set of a complex polynomial $p(z)$ is the set of points $$J = \{z \in \mathbb C:~\text{it is not true that } \lim_{n \to \infty} |p^n(z)| = \infty\}.$$ where $p^n=p \circ \ldots \circ p \circ p$ with $n$ copies of $p$.
We will focus on the polynomial $p(z) = z^2 - 1$. When $|z|$ is large, the highest order term dominates. The map $z \mapsto z^2$ has the effect of squaring the distance from the origin (because $|z^2|=|z|^2$). Therefore, there is an $r$ such that $|p(z)|>|z|$ for every $z \in \mathbb C$ with $|z|>r$. It is an exercise to show that if $r$ satisfies this property and $|z|>r$, then $z$ can not be in the Julia set.
We can estimate this $r$. It must be the radius of a circle centered the origin that encloses the set of points where $|p(z)| \leq |z|$. Since $p$ is continuous and $|p(z)|/|z| \to \infty$ as $|z| \to \infty$, it suffices to check that the circle of radius $r$ encloses the set of points where $|p(z)| = |z|$. We can see by plotting that $r=1.7$ works. (This is not rigorous, though you can come up with a rigorous argument easily.)
p(z) = z^2 - 1
var('x y')
plt1 = implicit_plot(abs(p(x+I*y)) == abs(x+I*y),(x,-2,2),(y,-2,2))
plt1
p(z) = z^2 - 1
var('x y')
plt1 = implicit_plot(abs(p(x+I*y)) == abs(x+I*y),(x,-2,2),(y,-2,2))
plt2 = circle((0,0),1.7, color='red')
plt1 + plt2
We will make use of two things. First Numpy's meshgrid
is useful for organizing points in a grid. You can read the documentation. Observe what it does.
x = np.arange(5)
print(f'x = {x}')
y = np.arange(6,10)
print(f'y = {y}')
xmesh, ymesh = np.meshgrid(x,y)
print(f'xmesh = {xmesh}')
print(f'ymesh = {ymesh}')
x = [0 1 2 3 4] y = [6 7 8 9] xmesh = [[0 1 2 3 4] [0 1 2 3 4] [0 1 2 3 4] [0 1 2 3 4]] ymesh = [[6 6 6 6 6] [7 7 7 7 7] [8 8 8 8 8] [9 9 9 9 9]]
Writing z = xmesh + I*ymesh
we get a grid in the complex plane:
z = xmesh + I.n() * ymesh # Use I.n() to avoid object dtype.
z
array([[0.+6.j, 1.+6.j, 2.+6.j, 3.+6.j, 4.+6.j], [0.+7.j, 1.+7.j, 2.+7.j, 3.+7.j, 4.+7.j], [0.+8.j, 1.+8.j, 2.+8.j, 3.+8.j, 4.+8.j], [0.+9.j, 1.+9.j, 2.+9.j, 3.+9.j, 4.+9.j]])
Another useful idea is masked arrays. A masked array tells Numpy to ignore some values. For example (from the docs):
x = np.array([1, 2, 3, -1, 5])
x
array([ 1, 2, 3, -1, 5])
xmask = np.ma.masked_array(x, mask=[0, 0, 0, 1, 0]) # You can use 1 or True to mask a value
xmask
masked_array(data=[1, 2, 3, --, 5], mask=[False, False, False, True, False], fill_value=999999)
Now anything we do will ignore the masked entry:
xmask.sum()
11
For example, we might want to do a square root, but $\sqrt{-1}$ is bad (often) so we needed to mask the $-1$:
np.sqrt(xmask)
/tmp/ipykernel_418619/495677667.py:1: RuntimeWarning: invalid value encountered in sqrt np.sqrt(xmask)
masked_array(data=[1.0, 1.4142135623730951, 1.7320508075688772, --, 2.23606797749979], mask=[False, False, False, True, False], fill_value=999999)
(Remark: I'm not sure why Sage's sqrt
function didn't work for me.)
Actually drawing the Julia set.¶
We'll do a 35x35 grid first and then improve it.
# z will represent an NxN grid in the complex plane.
N = 35
r = 1.7
x = np.linspace(-r, r, N)
y = np.linspace(-r, r, N)
xmesh, ymesh = np.meshgrid(x,y)
z = xmesh + I.n() * ymesh
z
array([[-1.7-1.7j, -1.6-1.7j, -1.5-1.7j, ..., 1.5-1.7j, 1.6-1.7j, 1.7-1.7j], [-1.7-1.6j, -1.6-1.6j, -1.5-1.6j, ..., 1.5-1.6j, 1.6-1.6j, 1.7-1.6j], [-1.7-1.5j, -1.6-1.5j, -1.5-1.5j, ..., 1.5-1.5j, 1.6-1.5j, 1.7-1.5j], ..., [-1.7+1.5j, -1.6+1.5j, -1.5+1.5j, ..., 1.5+1.5j, 1.6+1.5j, 1.7+1.5j], [-1.7+1.6j, -1.6+1.6j, -1.5+1.6j, ..., 1.5+1.6j, 1.6+1.6j, 1.7+1.6j], [-1.7+1.7j, -1.6+1.7j, -1.5+1.7j, ..., 1.5+1.7j, 1.6+1.7j, 1.7+1.7j]])
We want to mask elements of $z$ whose absolute value exceed $r=1.7$. This is for two reasons:
- We already know these z are not in the Julia set.
- We want to avoid overflow errors. These number's absolute values will grow very quickly.
mask = abs(z)>r # Mask is an array of booleans
mask
array([[ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], ..., [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True]])
mask[17][17]
False
mask[3]
array([ True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True])
# Mask the array
z = np.ma.masked_array(z, mask)
z
masked_array( data=[[--, --, --, ..., --, --, --], [--, --, --, ..., --, --, --], [--, --, --, ..., --, --, --], ..., [--, --, --, ..., --, --, --], [--, --, --, ..., --, --, --], [--, --, --, ..., --, --, --]], mask=[[ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], ..., [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True], [ True, True, True, ..., True, True, True]], fill_value=(1e+20+0j))
z[3]
masked_array(data=[--, --, --, --, --, --, --, --, (-0.9-1.4j), (-0.8-1.4j), (-0.7000000000000001-1.4j), (-0.6000000000000001-1.4j), (-0.5-1.4j), (-0.40000000000000013-1.4j), (-0.30000000000000004-1.4j), (-0.20000000000000018-1.4j), (-0.10000000000000009-1.4j), -1.4j, (0.09999999999999987-1.4j), (0.19999999999999996-1.4j), (0.2999999999999998-1.4j), (0.3999999999999997-1.4j), (0.4999999999999998-1.4j), (0.5999999999999999-1.4j), (0.7-1.4j), (0.8-1.4j), (0.8999999999999997-1.4j), --, --, --, --, --, --, --, --], mask=[ True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True], fill_value=(1e+20+0j))
Now we can iterate the polynomial. Whenever we do, we add to the masked set anything that lands outside the circle of radius $r$.
for i in range(50):
z = z^2-1
mask = abs(z)>r
z = np.ma.masked_array(z, mask)
/home/pat/Software/Sage/sage-git/lib/python3.11/site-packages/numpy/ma/core.py:467: ComplexWarning: Casting complex values to real discards the imaginary part fill_value = np.array(fill_value, copy=False, dtype=ndtype)
We plot the points that haven't escaped yet. We use Pillow, which is a standard Python image library that is included in SageMath. It makes it easy to convert a NumPy array to an image. Here we convert the mask to an image. This colors the masked (escaping) points white and the points that have not yet escaped black.
Our image is small:
from PIL import Image
Image.fromarray(mask)
Let's put it in a function:
def julia(N):
r = 1.7
x = np.linspace(-r, r, N)
y = np.linspace(-r, r, N)
xmesh, ymesh = np.meshgrid(x,y)
z = xmesh + I.n() * ymesh
mask = abs(z)>r # Mask is an array of booleans
z = np.ma.masked_array(z, mask)
for i in range(50):
z = z^2-1
mask = abs(z)>r
z = np.ma.masked_array(z, mask)
return Image.fromarray(mask)
%time julia(1000)
CPU times: user 1.24 s, sys: 141 ms, total: 1.38 s Wall time: 1.38 s
%time julia(4000)
CPU times: user 17 s, sys: 5.32 s, total: 22.3 s Wall time: 22.3 s